Link

New Functionalities

Many of the interesting conclusions that can be drawn from this dataset are associated with:

  1. Product price variability over time.
  2. Product availability over time.
  3. Product offer prices variability over time.

For people who don’t have access to the entire database, some new functionalities for the detection of variability of those variables could be useful. Here I propose some of them.

Download Jupyter Notebook for this section


Table of contents

  1. Price variability by product
    1. Example
  2. Price variability by retailer
    1. Example
  3. I’m feeling lucky
    1. Example
  4. Plot price vs time
    1. Example

Price variability by product

This function allows the user to know how many times the price (and availability and price promotion if apply) of a product changed in the sampling time period.

product_variability({product ID})

product_variability <- function(prod,price_var=1){

    date_range <- function(date1,date2){        
        date1= format(as.Date(date1, format = "%Y-%m-%d"), "%b-%y")
        date2= format(as.Date(date2, format = "%Y-%m-%d"), "%b-%y")
        drange <- paste(date2,date1,sep="-")       
        return(drange)
    }

    price_range <- function(price_min,price_max){        
        prange <- paste(price_min,price_max,sep="-")     
        return(prange)
    }

    #Detect variability in col column
    variability <- function(df, col,product){
        df <- subset(df, !is.na(df[[col]]))
        var=0
        if (nrow(df)>1){
            row_value <- df[1, col]
            for (row in 2:nrow(df)){
                if(df[row, col]!= row_value) {
                row_value <- df[row, col]
                var <- var + 1   
                }                                                        
            }
        }
        return(var)
    }

    #Get prices for product ID
    p_prices<-product_prices(prod)

    #Get how many times the price changes over the date range
    p_var <- variability(p_prices,"price",prod)

    #Get more information if the price change more or equal to price_var times
    if (p_var >= price_var){

        retailer_id <- product(prod)$retailer_id
        product<-product(prod)$product
        total_samples<-nrow(p_prices)

        date_range <- date_range(p_prices$'time'[1],p_prices$'time'[total_samples])
        price_range <- price_range(min(p_prices$'price'),max(p_prices$'price'))

        #Check availability variability
        if (all(is.na(p_prices[['available']]))){
            av_var <- "NA"
        }
        else{    
            av_var <- variability(p_prices,"available",prod)
        }

        #Check promotions variability
        if (all(is.na(p_prices[['price_promotion']]))){
            prom_var <- "NA"
        }
        else{    
            prom_var <- variability(p_prices,"price_promotion",prod)
        }

        #Save the data and convert indo a dataframe
        output<-list(retailer_id,prod,product,price_range,p_var,av_var,prom_var,total_samples,date_range)       
        output<-as.data.frame(output)
        names(output) <- c('retailer_id','product_id','product','price_range','price_variability','avail_variability','prom_variability','total_samples', 'date_range')
    }
    else{
    output <- data.frame()
    }            
    return(output)
}

Example

product_variability(587936)
retailer_idproduct_idproductprice_rangeprice_variabilityavail_variabilityprom_variabilitytotal_samplesdate_range
52587936JVC LT-50N750A 50"FHD SMART L.E.D TV4890-699980NA15Feb-20-May-20

Product 587936 changed price 8 times in the sampling period (Feb-20-May-20). With no variation in availability and no data about promotions. We can check this here:

product_prices(587936)
product_idtimepriceprice_promotionprice_effectiveavailable
5879362020-05-25 00:37:586999NA6999TRUE
5879362020-05-18 00:36:114999NA4999TRUE
5879362020-05-11 05:00:264999NA4999TRUE
5879362020-05-04 00:36:215699NA5699TRUE
5879362020-04-20 00:33:214999NA4999TRUE
5879362020-04-13 00:26:104999NA4999TRUE
5879362020-03-30 00:28:275499NA5499TRUE
5879362020-03-23 00:27:195499NA5499TRUE
5879362020-03-16 00:33:524890NA4890TRUE
5879362020-03-09 00:26:234890NA4890TRUE
5879362020-03-02 00:28:375499NA5499TRUE
5879362020-02-24 00:26:434890NA4890TRUE
5879362020-02-17 00:44:015499NA5499TRUE
5879362020-02-10 00:43:405499NA5499TRUE
5879362020-02-03 00:36:195499NA5499TRUE


Price variability by retailer

This function allows the user to know if the price of n_products products randomly selected for a certain retailer changed more or equal than price_var times in the sampling time period.

retailer_variability({retailer ID or list of retailers IDs}, {price_var}, {n_products})

By default n_products=10 and price_var=1

You can also see the changes (if exist) on availability or price promotion.

retailer_variability <- function(ret_id, price_var=1, n_products=10) {

    output=data.frame()
    var_not_found =c()

    #For each retailer ID, get randomly #n_products products and detect variability
    for (ret in ret_id){
        products <- c(retailer_products(ret) [['product_id']])
        prod_ids <- c(sample(products, n_products, replace=FALSE))

        for (prod in prod_ids){

            getprod <- (product_variability(prod,price_var))
            if (nrow(getprod)>0){
                output <- rbind(output,getprod)
            }
        }                
    }
    #Print details if no variability found
    if (nrow(output)>0){
        var_not_found <- ret_id[!(ret_id %in% output[['retailer_id']])]

        if (length(var_not_found)>1){
            cat("Nothing found for retailers: ",var_not_found, sep=" ")
        }
        else if (length(var_not_found)==1) {
            cat("Nothing found for retailer: ",var_not_found)
        }
    }
    else{
        print("No variability found")    
    }
    output              
}


Example

retailer_variability(71)
retailer_idproduct_idproductprice_rangeprice_variabilityavail_variabilityprom_variabilitytotal_samplesdate_range
711387108Canon EF-S 55-250 mm f4-5.6 IS STM Lens – Lenses – Foto Discount World6100-70004NA410Mar-20-May-20
711385898Defy Cooker Hood 600 STD B (DCH290) | Delivery Charge Excluded1000-11002NA210Mar-20-May-20
711387150Nikon 50mm f1.4G AF-S Lens – Lenses – Foto Discount World8600-108003NA410Mar-20-May-20
711385866Meade Infinity 50mm Altazimuth Refractor + Free Delivery920-10951NANA10Mar-20-May-20
711385491Canon Filter 82mm Protector + Free Delivery1600-17003NA410Mar-20-May-20
711383973Fujifilm XF 35mm F1.4 R Lens – Lenses – Foto Discount World8900-112003NA410Mar-20-May-20
711384171Sigma 16mm f/1.4 DC DN Contemporary Lens for Micro Four Thirds + Free Delivery7100-80003NA410Mar-20-May-20

You can also pass a list of retailers IDs:

retailer_variability(c(71,24,3,52))
Nothing found for retailer:  3
retailer_idproduct_idproductprice_rangeprice_variabilityavail_variabilityprom_variabilitytotal_samplesdate_range
711386116Sigma 52mm WR UV filter + Free Delivery700-8003NA310Mar-20-May-20
711384327Canon Zoemini C – Instant Camera Printer ( Bumblebee Yellow ) – Cameras – Foto Discount World2000-24003NA410Mar-20-May-20
711382284Nikon AF-S 70-200mm f4G ED VR Lens – Lenses – Foto Discount World0-309002NA110Mar-20-May-20
711387643Canon EOS C300 Mk II ( CFast Card ) EF Mount Video Camera – Video Cameras – Foto Discount World139100-1534004NA410Mar-20-May-20
24 96497Arroll 4-Column Cast Iron Radiator 760 x 754mm Cream391.02-409.992NANA16Jan-20-May-20
24 246308Blyss Undertile Heating Mat 1m²19.99-39.992NA016Jan-20-May-20
52 588202AMPLIFY PRO FUSION BT H/P-AMP2011BKBL199.99-299.992NANA15Feb-20-May-20
52 588168OVHD REPLACEMENT REMOTE CONTROL99.99-149.992NANA15Feb-20-May-20
522268183HISENSE H12DESS S/STEEL 12PLC D/WASHE4299-49991NANA 6Apr-20-May-20
52 588005HISENSE WTX1302T 13KG TITANIUM T/L4199-47996NANA14Feb-20-May-20
52 5889143PCE HILTON WALL UNIT3900-44994NANA15Feb-20-May-20
52 588460HP AMD ULTIMATE BUNDLE5690-59999NANA15Feb-20-May-20
52 588001JVC XV-N430B HDMI DVD PLAYER450-6996NANA15Feb-20-May-20
52 588900ALPINE TV STAND1700-19998NANA15Feb-20-May-20
52 588842152CM DREAM THERAPY BASE SET3990-59997NANA15Feb-20-May-20


Note: If you get “Nothing found for retailer {retailer_ID}” it doesn’t mean that there is no product with price variation >= price_var for this retailer. It means that it is no variation for the randomly selected n_products products for that retailer.


I’m feeling lucky

This function allows the user to know if the price of n_products randomly selected products for n_retailers randomly selected retailers changed more or equal than price_var times in the sampling time period.

feeling_lucky_variability({n_retailers}, {price_var}, {n_products})

By default n_retailers=5, n_products=10 and price_var=1

You can also see the change (if exist) on availability or price promotion.

feeling_lucky_variability <- function(n_retailers=5, price_var=1, n_products=10){

    #For all the retailers ID, get randomly #n_retailers retailers
    retailers <- retailer()[['retailer_id']]
    ret_id <- c(sample(retailers, n_retailers, replace=FALSE))

    #Check variability for all the randomly selected retailers
    retailer_variability(ret_id, price_var, n_products)    
}


Example

feeling_lucky_variability()
Nothing found for retailers:  64 86
retailer_idproduct_idproductprice_rangeprice_variabilityavail_variabilityprom_variabilitytotal_samplesdate_range
63 900900Good Grips Pop 2 Rectangular Container - Wide Tall, 3.5L359-36910NA69Feb-20-May-20
63 905964Gourmet Chutney, 250ml49-5210NA69Feb-20-May-20
59 516135Yellow Roses in Designer Box - 12 roses520-6764NANA14Jan-20-May-20
591518697Personalised Polaroid Sweater - L520-5401NANA 9Mar-20-May-20
59 484893Personalised Vintage Pink Fleece Gown Gift - Medium1300-14953NANA 8Jan-20-May-20
592697016HALLS FRUIT DRINK 1.25LT, MANGO'ORANG - Standard43-441NANA 3Apr-20-May-20
59 496175Pink Party Combo Box - Standard390-4051NANA14Jan-20-May-20
59 685649Personalised Gentleman Backsberg Wine - Brut mcc520-5982NANA12Feb-20-May-20
59 636151Pastel Pink Floral Flair in Envelope Box - Purple611-7694NANA12Feb-20-May-20
591224465Personalised Pineapple Beach Towel - Standard390-4051NANA11Mar-20-May-20
59 476149Personalised Fierce Waterbottle And Racerback - Large520-5983NANA13Jan-20-May-20
59 670078Personalised Leaf Foilage Stone Towel Set - Set of 3910-10792NANA11Feb-20-May-20
952245608Nutriday Low Fat Guava Fruit Yoghurt 1kg29.99-32.491NANA41Apr-20-May-20
952732671Denny Country Casserole Flavoured Cook-In-Sauce Pouch 415g26.99-34.991NANA21May-20-May-20
952900189<span style=white-space:pre-wrap>Fresh Cut Berry & Melon Fruit Salad Pack 600g </span>54.99-59.991NANA 7May-20-May-20


Plot price vs time

This function allows the user to get an interactive visualization about the price (and price promotion if applies) vs time of a certain product.

plot_prod_variability({product_id})

library(plotly)
plot_prod_variability<- function(prod){

    #Get data about the product
    p_prices <- product_prices(prod)
    product <- product(prod)$product
    retailer <- product(prod)$retailer_id       

    #Build an interactive plot
    trace_0 <- p_prices[['price']]
    trace_1 <- p_prices[['price_promotion']]

    fig <- plot_ly(p_prices, x = p_prices[['time']])
    fig <- fig %>% add_trace(y = ~trace_0, name = 'price',mode = 'lines')
    fig <- fig %>% add_trace(y = ~trace_1, name = 'price promotion', mode = 'lines')
    fig <- fig %>%
    layout(autosize = F,
           width = 600,
           height = 350,
           margin = list(l=50, r=50, b=100, t=150, pad=4),
           title = list(text = paste0(paste("Product: ",product),
                        '<br>',
                        '<br>',          
                        '<sup>',
                        paste("Product ID: ",toString(prod),", Retailer ID: ",product(prod)$retailer_id),
                        '</sup>')),
           xaxis = list(title="Date"),
           yaxis = list(title="Price")         
          )

    return(fig)
}


Example

plot_prod_variability(1386116)